{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b30f168a-427c-4b8f-aead-bdd2edd61c4c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from linearmodels import PanelOLS\n",
    "from linearmodels.panel import PooledOLS\n",
    "import statsmodels.api as sm\n",
    "from tabulate import tabulate\n",
    "import os\n",
    "\n",
    "\n",
    "cd_data = '.../Data/Intraday/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1a5cf608-8658-4291-b4b2-88192bce53a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load main datasets\n",
    "# Firm sentiment\n",
    "df_firm = pd.read_parquet(cd_data + 'Congress_tweets.parquet')\n",
    "df_firm['Date'] = pd.to_datetime(df_firm['Date'])\n",
    "\n",
    "# first decile\n",
    "break_low_df = df_firm[['tone']].describe(percentiles=[0.1]).T[['10%']].rename(columns={'10%':'Low'})\n",
    "\n",
    "# last decile\n",
    "break_high_df = df_firm[['tone']].describe(percentiles=[0.9]).T[['90%']].rename(columns={'90%':'High'})\n",
    "breaks_temp = pd.merge(break_low_df, break_high_df, how = 'inner', left_index = True, right_index = True)\n",
    "\n",
    "\n",
    "# Sort into the first and last decile of tone measure\n",
    "df_firm['bins'] = 0 \n",
    "df_firm['bins'] = np.where(df_firm['tone'] < breaks_temp['Low'][0], -1, df_firm['bins'])   \n",
    "df_firm['bins'] = np.where(df_firm['tone'] > breaks_temp['High'][0], 1, df_firm['bins'])   \n",
    "\n",
    "\n",
    "df_cum_returns =  pd.read_parquet(cd_data + 'CumulativeReturns_around_tweets_(minus)20m_to_90m.parquet')    \n",
    "\n",
    "df_reg = pd.merge(df_cum_returns, df_firm[['Date','permno','bins']], how = 'left', on = ['Date','permno'])\n",
    "df_reg = df_reg.dropna(subset = ['bins'])\n",
    "\n",
    "min_announcements = list(df_reg['time_difference'].unique())\n",
    "results_df_all = pd.DataFrame()\n",
    "for index_min in min_announcements:\n",
    "    df_bins_reg = df_reg[df_reg['time_difference'] == index_min]    \n",
    "    cols_to_dummies = \"bins\" \n",
    "\n",
    "    df_with_dummies = pd.get_dummies(df_bins_reg, columns=[cols_to_dummies], drop_first=False)\n",
    "    df_bins_surprise= pd.concat([df_bins_reg[cols_to_dummies], df_with_dummies], axis=1)\n",
    "\n",
    "    columns_dummy  = [x for x in df_bins_surprise if x.startswith('bins_')]\n",
    "\n",
    "    df_plot_reg = df_bins_surprise[['ticker','Date','price_norm'] +columns_dummy ]\n",
    "    df_plot_reg = df_plot_reg.set_index(['ticker','Date']) \n",
    "\n",
    "    df_plot_reg = df_plot_reg.dropna(subset = columns_dummy + ['price_norm'])\n",
    "    exog = df_plot_reg[columns_dummy ]\n",
    "    enog  = df_plot_reg['price_norm'] \n",
    "\n",
    "    mod = PanelOLS(enog, exog, entity_effects=True)\n",
    "    results = mod.fit(cov_type='clustered' , cluster_entity = True, cluster_time=True)\n",
    "\n",
    "    params_est =pd.DataFrame(results.params)  \n",
    "    tvals_est = pd.DataFrame(results.tstats) \n",
    "    std_errors = pd.DataFrame(results.std_errors) \n",
    "\n",
    "    results_df = pd.merge(params_est,tvals_est, how = 'inner', left_index = True, right_index = True)\n",
    "    results_df = pd.merge(results_df,std_errors, how = 'inner', left_index = True, right_index = True).T\n",
    "    results_df['Minute'] = index_min\n",
    "\n",
    "    results_df_all = pd.concat([results_df_all,results_df])\n",
    "\n",
    "# Plot the cumulative returns for the first and last decile\n",
    "def get_df_plot(df_t,select_var,select_sig):\n",
    "    \n",
    "    df_t = df_t[['Minute', select_var]]\n",
    "    df_t_param = df_t[df_t.index == 'parameter'].reset_index(drop = True).set_index(['Minute'])\n",
    "    df_t_param.columns = ['a']\n",
    "    df_t_param = df_t_param.reset_index()\n",
    "   \n",
    "\n",
    "    df_t_se = df_t[df_t.index == 'std_error'].reset_index(drop = True).set_index(['Minute'])\n",
    "    df_t_se.columns = ['a_se']\n",
    "    df_t_se = df_t_se.reset_index()\n",
    "\n",
    "    df_t_2 = pd.merge(df_t_param, df_t_se, how = 'outer', on = 'Minute')\n",
    "\n",
    "    df_t_2['lb'] = df_t_2['a'] - select_sig*df_t_2['a_se']\n",
    "    df_t_2['ub'] = df_t_2['a'] + select_sig*df_t_2['a_se']\n",
    "\n",
    "    return df_t_2\n",
    "\n",
    "df_plot = results_df_all.copy().sort_values(['Minute'])\n",
    "select_sig = 2.0\n",
    "\n",
    "select_var = 'bins_-1.0'\n",
    "df_low = get_df_plot(df_plot,select_var,select_sig)\n",
    "\n",
    "select_var = 'bins_1.0'\n",
    "df_high = get_df_plot(df_plot,select_var,select_sig)\n",
    "\n",
    "# plot\n",
    "fig, axes = plt.subplots(1, 1 , figsize=(16,6), sharex=True, sharey=False)\n",
    "xdata_low = df_low['Minute'].to_numpy()\n",
    "ydata_low = df_low['a'].to_numpy()\n",
    "ydata_lb_low = df_low['ub'].to_numpy() \n",
    "ydata_ub_low = df_low['lb'].to_numpy() \n",
    "plt.plot(xdata_high, ydata_high, '-',color = 'darkblue', linewidth=3)\n",
    "\n",
    "xdata_high = df_high['Minute'].to_numpy()\n",
    "ydata_high = df_high['a'].to_numpy()\n",
    "ydata_lb_high = df_high['ub'].to_numpy()\n",
    "ydata_ub_high = df_high['lb'].to_numpy()\n",
    "plt.plot(xdata_low, ydata_low, '-',color = 'tab:red', linewidth=3)\n",
    "\n",
    "plt.legend(labels=['Supportive tone', 'Critical tone'], loc='upper right'  , fontsize='25',frameon=False,ncol=2, columnspacing=1)\n",
    "\n",
    "# cofidence bands\n",
    "plt.fill_between(xdata_high, ydata_lb_high, ydata_ub_high, color='darkblue', alpha=0.1)\n",
    "plt.fill_between(xdata_low, ydata_lb_low, ydata_ub_low, color='tab:red', alpha=0.1)\n",
    "\n",
    "plt.ylabel('Cumulative price change in basis points', fontsize='20')\n",
    "plt.xlabel('Minutes around the event', fontsize='20')\n",
    "axes.spines['top'].set_visible(False)\n",
    "axes.spines['right'].set_visible(False)  \n",
    "axes.xaxis.set_tick_params(labelsize=19)\n",
    "axes.yaxis.set_tick_params(labelsize=19)\n",
    "\n",
    "plt.ylim([-13, 13])\n",
    "plt.xlim([-20,90])\n",
    "plt.xticks([-20,-10,-5, 0, 5, 10, 20,30 , 60,80,90])\n",
    "\n",
    "plt.axhline(y=0,linestyle =':',color='black',label=r'Announcement',lw=2) \n",
    "plt.axvline(x=0,linestyle =':',color='black',label=r'Announcement',lw=2) \n",
    "plt.tight_layout()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
